1 Load data

### Load required libraries
library(readxl)
library(tidyverse)
library(purrr)
library(dplyr)
library(knitr)
library(DT)
library(httr)
library(jsonlite)
library(stringr)
library(GSEABase)
library(org.Hs.eg.db)

### Load functions
source(file = "../02_functions.R")

### Define paths
current_dir <- getwd()
data_path <- file.path(current_dir,
                       "../../data")
results_pc_path <- file.path(current_dir,
                              "../../results/output/patient_characteristics")
results_fip_path <- file.path(current_dir,
                              "../../results/output/somatic_mutation_analysis/functional_impact_prediction")
results_gor_path <- file.path(current_dir, 
                                 "../../results/output/somatic_mutation_analysis/gene_overrepresentation_analysis")
figures_path <- file.path(current_dir, 
                                 "../../results/figures")

maf_df_path <- file.path(results_fip_path, "all_mutations.csv") # All mutations df
nonsyn_df_path <- file.path(results_fip_path, "nonsyn_mutations.csv") # Nonsyn mutations df
lof_df_path <- file.path(results_fip_path, "lof_mutations.csv") # LoF mutations df

# Sup1 data
sup1_response_path <- file.path(results_pc_path, 
                           "sup1_response.csv")
### Read data & wrangle

# Read 3 lists of mutations

# All
maf_df_annotated <- read.csv(maf_df_path, header = TRUE)[ , -1]
list_of_all_mutations <- split(maf_df_annotated, maf_df_annotated$sample_id
                               )
# Nonsyn
nonsyn_df_annotated <- read.csv(nonsyn_df_path, header = TRUE)[ , -1]
list_of_nonsyn_mutations <- split(nonsyn_df_annotated, nonsyn_df_annotated$sample_id)

# LoF
lof_df_annotated <- read.csv(lof_df_path, header = TRUE)[ , -1]
list_of_lof_mutations <- split(lof_df_annotated, lof_df_annotated$sample_id)

# Get the list of patient IDs
sample_id_list <- unique(maf_df_annotated$sample_id)

## Read sup1_response.csv
sup1_response_df <- read.csv(sup1_response_path)[ , -1] # Drop first column

# df to merge response info later
response_df <- sup1_response_df %>% 
  dplyr::rename(sample_id = Sample.ID,
                patient_response = Patient.Response) %>% 
  dplyr::select(sample_id, patient_response)

# Create a list of Responders and Non-responders
sample_id_lists <- split(sup1_response_df$Sample.ID, 
                         sup1_response_df$Patient.Response) # Split the "Sample.ID" values into lists based on "Patient.Response"

r_sample_ids <- sample_id_lists$R
nr_sample_ids <- sample_id_lists$NR

2 Pathway analysis (gene overrepresentation)

2.1 First analysis using C5 - BP

# Define paths
fora_results_all_csv_path <- file.path(results_gor_path, "fora_results_c5bp_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_gor_path, "fora_results_c5bp_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_gor_path, "fora_results_c5bp_lof.csv")

# # Perform FORA analysis for H Gene Sets
# results_list <- perform_fora_analysis(list_of_all_mutations, list_of_nonsyn_mutations, list_of_lof_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="C5", subcategory="BP")

# Load fora results
fora_results_c5bp_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_c5bp_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_c5bp_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]

2.1.1 Visualization: Top C5 BP across samples stratified by response (All mutations)

# Define output path
mm909_c5bp_all_plot_path <- file.path(figures_path,
                              "mm909_c5bp_all_plot.png")

mm909_c5bp_all_plot <- plot_top5(response_df, fora_results_c5bp_all_df, mm909_c5bp_all_plot_path, "FORA with top C5 BP across samples (All mutations)", "Top C5 BP Gene Sets")

print(mm909_c5bp_all_plot)

2.1.2 Visualization: Top C5 BP across samples stratified by response (Nonsyn mutations)

# Define output path
mm909_c5bp_nonsyn_plot_path <- file.path(figures_path,
                              "mm909_c5bp_nonsyn_plot.png")

mm909_c5bp_nonsyn_plot <- plot_top5(response_df, fora_results_c5bp_nonsyn_df, mm909_c5bp_nonsyn_plot_path, "FORA with top C5 BP across samples (Nonsyn mutations)", "Top C5 BP Gene Sets")

print(mm909_c5bp_nonsyn_plot)

2.1.3 Visualization: Top C5 BP across samples stratified by response (LoF mutations)

# Define output path
mm909_c5bp_lof_plot_path <- file.path(figures_path,
                              "mm909_c5bp_lof_plot.png")

mm909_c5bp_lof_plot <- plot_top5(response_df, fora_results_c5bp_lof_df, mm909_c5bp_lof_plot_path, "FORA with top C5 BP across samples (LoF mutations)", "Top C5 BP Gene Sets")

print(mm909_c5bp_lof_plot)

2.2 Second analysis using C5 - CC

# Define paths
fora_results_all_csv_path <- file.path(results_gor_path, "fora_results_c5cc_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_gor_path, "fora_results_c5cc_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_gor_path, "fora_results_c5cc_lof.csv")

# # Perform FORA analysis for H Gene Sets
# results_list <- perform_fora_analysis(list_of_all_mutations, list_of_nonsyn_mutations, list_of_lof_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="C5", subcategory="CC")

# Load fora results
fora_results_c5cc_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_c5cc_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_c5cc_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]

2.2.1 Visualization: C5 CC PROTEASOME across samples stratified by response (LoF mutations)

# Define output path
mm909_c5cc_proteasome_lof_plot_path <- file.path(figures_path,
                              "mm909_c5cc_proteasome_lof_plot.png")

proteasome_genesets = c("GOCC_PROTEASOME_ACCESSORY_COMPLEX", "GOCC_PROTEASOME_COMPLEX", "GOCC_PROTEASOME_CORE_COMPLEX", "GOCC_PROTEASOME_CORE_COMPLEX_ALPHA_SUBUNIT_COMPLEX", "GOCC_PROTEASOME_CORE_COMPLEX_BETA_SUBUNIT_COMPLEX", "GOCC_PROTEASOME_REGULATORY_PARTICLE", "GOCC_PROTEASOME_REGULATORY_PARTICLE_BASE_SUBCOMPLEX", "GOCC_PROTEASOME_REGULATORY_PARTICLE_LID_SUBCOMPLEX")

mm909_c5cc_proteasome_lof_plot <- plot_geneset(proteasome_genesets, response_df, fora_results_c5cc_lof_df, mm909_c5cc_proteasome_lof_plot_path, "FORA with PROTEASOME-related C5 CC Gene Sets across samples (LoF mutations)", "PROTEASOME Gene Sets")

print(mm909_c5cc_proteasome_lof_plot)

2.3 Third analysis using H

# Define paths
fora_results_all_csv_path <- file.path(results_gor_path, "fora_results_h_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_gor_path, "fora_results_h_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_gor_path, "fora_results_h_lof.csv")

# # Perform FORA analysis for H Gene Sets
# results_list <- perform_fora_analysis(list_of_all_mutations, list_of_nonsyn_mutations, list_of_lof_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="H")

# Load fora results
fora_results_h_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_h_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_h_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]

2.3.1 Visualization: Top H across samples stratified by response (All mutations)

# Define output path
mm909_h_all_plot_path <- file.path(figures_path,
                              "mm909_all_h_plot.png")

mm909_h_all_plot <- plot_top5(response_df, fora_results_h_all_df, mm909_h_all_plot_path, "FORA with top H across samples (All mutations)", "Top H Gene Sets")

print(mm909_h_all_plot)

2.3.2 Visualization: Top H across samples stratified by response (Nonsyn mutations)

# Define output path
mm909_h_nonsyn_plot_path <- file.path(figures_path,
                              "mm909_nonsyn_h_plot.png")

mm909_h_nonsyn_plot <- plot_top5(response_df, fora_results_h_nonsyn_df, mm909_h_nonsyn_plot_path, "FORA with top H across samples (Nonsyn mutations)", "Top H Gene Sets")

print(mm909_h_nonsyn_plot)

2.3.3 Visualization: Top H across samples stratified by response (LoF mutations)

# Define output path
mm909_h_lof_plot_path <- file.path(figures_path,
                              "mm909_lof_h_plot.png")

mm909_h_lof_plot <- plot_top5(response_df, fora_results_h_lof_df, mm909_h_lof_plot_path, "FORA with top H across samples (LoF mutations)", "Top H Gene Sets")

print(mm909_h_lof_plot)

2.3.4 Visualization: HALLMARK_APOPTOSIS across samples stratified by response (LoF)

# Define output path
mm909_h_apoptosis_plot_path <- file.path(figures_path,
                              "mm909_h_apoptosis_plot.png")

mm909_h_apoptosis_plot <- plot_geneset(c("HALLMARK_APOPTOSIS"), response_df, fora_results_h_lof_df, mm909_h_apoptosis_plot_path, "FORA with HALLMARK_APOPTOSIS across samples (LoF mutations)", "HALLMARK_APOPTOSIS Gene Set")

print(mm909_h_apoptosis_plot)

2.4 Fourth analysis using C6 (oncogenic)

# Define paths
fora_results_all_csv_path <- file.path(results_gor_path, "fora_results_c6_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_gor_path, "fora_results_c6_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_gor_path, "fora_results_c6_lof.csv")

# # Perform FORA analysis for C6 Gene Sets
# results_list <- perform_fora_analysis(list_of_all_mutations, list_of_nonsyn_mutations, list_of_lof_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="C6")

# Load fora results
fora_results_c6_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_c6_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_c6_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]

2.4.1 Visualization: Top C6 across samples stratified by response (All mutations)

# Define output path
mm909_c6_all_plot_path <- file.path(figures_path,
                              "mm909_c6_all_plot.png")

mm909_c6_all_plot <- plot_top5(response_df, fora_results_c6_all_df, mm909_c6_all_plot_path, "FORA with top C6 across samples (All mutations)", "Top C6 Gene Sets")

print(mm909_c6_all_plot)

2.4.2 Visualization: Top C6 across samples stratified by response (Nonsyn mutations)

# Define output path
mm909_c6_nonsyn_plot_path <- file.path(figures_path,
                              "mm909_c6_nonsyn_plot.png")

mm909_c6_nonsyn_plot <- plot_top5(response_df, fora_results_c6_nonsyn_df, mm909_c6_nonsyn_plot_path, "FORA with top C6 across samples (Nonsyn mutations)", "Top C6 Gene Sets")

print(mm909_c6_nonsyn_plot)

2.4.3 Visualization: Top C6 across samples stratified by response (LoF mutations)

# Define output path
mm909_c6_lof_plot_path <- file.path(figures_path,
                              "mm909_c6_lof_plot.png")

mm909_c6_lof_plot <- plot_top5(response_df, fora_results_c6_lof_df, mm909_c6_lof_plot_path, "FORA with top C6 across samples (LoF mutations)", "Top C6 Gene Sets")

print(mm909_c6_lof_plot)

2.5 Fifth analysis using C7 (immunologic)

# Define paths
fora_results_all_csv_path <- file.path(results_gor_path, "fora_results_c7_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_gor_path, "fora_results_c7_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_gor_path, "fora_results_c7_lof.csv")

# # Perform FORA analysis for C7 Gene Sets
# results_list <- perform_fora_analysis(list_of_all_mutations, list_of_nonsyn_mutations, list_of_lof_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="C7")

# Load fora results
fora_results_c7_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_c7_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_c7_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]

2.5.1 Visualization: Top C7 across samples stratified by response (All mutations)

# Define output path
mm909_c7_all_plot_path <- file.path(figures_path,
                              "mm909_c7_all_plot.png")

mm909_c7_all_plot <- plot_top5(response_df, fora_results_c7_all_df, mm909_c7_all_plot_path, "FORA with top C7 across samples (All mutations)", "Top C7 Gene Sets")

print(mm909_c7_all_plot)

2.5.2 Visualization: Top C7 across samples stratified by response (Nonsyn mutations)

# Define output path
mm909_c7_nonsyn_plot_path <- file.path(figures_path,
                              "mm909_c7_nonsyn_plot.png")

mm909_c7_nonsyn_plot <- plot_top5(response_df, fora_results_c7_nonsyn_df, mm909_c7_nonsyn_plot_path, "FORA with top C7 across samples (Nonsyn mutations)", "Top C7 Gene Sets")

print(mm909_c7_nonsyn_plot)

2.5.3 Visualization: Top C7 across samples stratified by response (LoF mutations)

# Define output path
mm909_c7_lof_plot_path <- file.path(figures_path,
                              "mm909_c7_lof_plot.png")

mm909_c7_lof_plot <- plot_top5(response_df, fora_results_c7_lof_df, mm909_c7_lof_plot_path, "FORA with top C7 across samples (LoF mutations)", "Top C7 Gene Sets")

print(mm909_c7_lof_plot)

2.6 Sixth analysis using MSigDB filter of “Ubiquitin”

# Define paths
fora_results_all_csv_path <- file.path(results_gor_path, "fora_results_ubiquitin_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_gor_path, "fora_results_ubiquitin_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_gor_path, "fora_results_ubiquitin_lof.csv")

# # Load GMT file as a GeneSetCollection object
# gene_set_collection <- getGmt(file.path(data_path, "genesets_ubiquitin.v2023.2.Hs.gmt"))
# 
# # Initialize an empty list to store the results
# gene_sets_list <- list()
# 
# # Iterate over each GeneSet in the GeneSetCollection
# for (setName in names(gene_set_collection)) {
#   # Extract the GeneSet by its name
#   geneSet <- gene_set_collection[[setName]]
# 
#   # Convert Hugo symbols to Ensembl IDs for this GeneSet
#   ensembl_ids <- SYMBOLtoENSEMBL(geneIds(geneSet))
# 
#   # Ensure the result is a character vector of Ensembl Gene IDs
#   ensembl_ids_vector <- as.character(ensembl_ids)
# 
#   # Store the converted IDs in the list, associated with the setName
#   gene_sets_list[[setName]] <- ensembl_ids_vector
# }
# 
# # Set names for each element in the list to match the setName
# names(gene_sets_list) <- names(gene_set_collection)
# 
# # Perform FORA analysis for Ubiquitin Gene Sets
# results_list <- perform_fora_analysis(list_of_all_mutations, list_of_nonsyn_mutations, list_of_lof_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, gene_sets_list=gene_sets_list)

# Load fora results
fora_results_ubiquitin_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_ubiquitin_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_ubiquitin_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]

2.6.1 Visualization: Top Ubiquitin GS across samples stratified by response (All mutations)

# Define output path
mm909_ubiquitin_all_plot_path <- file.path(figures_path,
                              "mm909_ubiquitin_all_plot.png")

mm909_ubiquitin_all_plot <- plot_top5(response_df, fora_results_ubiquitin_all_df, mm909_ubiquitin_all_plot_path, "FORA with top Ubiquitin GS across samples (All mutations)", "Top Ubiquitin Gene Sets")

print(mm909_ubiquitin_all_plot)

2.6.2 Visualization: Top Ubiquitin GS across samples stratified by response (Nonsyn mutations)

# Define output path
mm909_ubiquitin_nonsyn_plot_path <- file.path(figures_path,
                              "mm909_ubiquitin_nonsyn_plot.png")

mm909_ubiquitin_nonsyn_plot <- plot_top5(response_df, fora_results_ubiquitin_nonsyn_df, mm909_ubiquitin_nonsyn_plot_path, "FORA with top Ubiquitin GS across samples (Nonsyn mutations)", "Top Ubiquitin Gene Sets")

print(mm909_ubiquitin_nonsyn_plot)

2.6.3 Visualization: Top Ubiquitin GS across samples stratified by response (LoF mutations)

# Define output path
mm909_ubiquitin_lof_plot_path <- file.path(figures_path,
                              "mm909_ubiquitin_lof_plot.png")

mm909_ubiquitin_lof_plot <- plot_top5(response_df, fora_results_ubiquitin_lof_df, mm909_ubiquitin_lof_plot_path, "FORA with top Ubiquitin GS across samples (LoF mutations)", "Top Ubiquitin Gene Sets")

print(mm909_ubiquitin_lof_plot)

2.7 Seventh analysis using C2:CGP (chemical & genetic perturbations)

# Define paths
fora_results_all_csv_path <- file.path(results_gor_path, "fora_results_c2cgp_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_gor_path, "fora_results_c2cgp_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_gor_path, "fora_results_c2cgp_lof.csv")

# # Perform FORA analysis for C2_CGP Gene Sets
# results_list <- perform_fora_analysis(list_of_all_mutations, list_of_nonsyn_mutations, list_of_lof_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="C2", subcategory="CGP")

# Load fora results
fora_results_c2cgp_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_c2cgp_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_c2cgp_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]

2.7.1 Visualization: Top C2:CGP across samples stratified by response (All mutations)

# Define output path
mm909_c2cgp_all_plot_path <- file.path(figures_path,
                              "mm909_c2cgp_all_plot.png")

mm909_c2cgp_all_plot <- plot_top5(response_df, fora_results_c2cgp_all_df, mm909_c2cgp_all_plot_path, "FORA with top C2:CGP across samples (All mutations)", "Top C2:CGP Gene Sets")

print(mm909_c2cgp_all_plot)

2.7.2 Visualization: Top C2:CGP across samples stratified by response (Nonsyn mutations)

# Define output path
mm909_c2cgp_nonsyn_plot_path <- file.path(figures_path,
                              "mm909_c2cgp_nonsyn_plot.png")

mm909_c2cgp_nonsyn_plot <- plot_top5(response_df, fora_results_c2cgp_nonsyn_df, mm909_c2cgp_nonsyn_plot_path, "FORA with top C2:CGP across samples (Nonsyn mutations)", "Top C2:CGP Gene Sets")

print(mm909_c2cgp_nonsyn_plot)

2.7.3 Visualization: Top C2:CGP across samples stratified by response (LoF mutations)

# Define output path
mm909_c2cgp_lof_plot_path <- file.path(figures_path,
                              "mm909_c2cgp_lof_plot.png")

mm909_c2cgp_lof_plot <- plot_top5(response_df, fora_results_c2cgp_lof_df, mm909_c2cgp_lof_plot_path, "FORA with top C2:CGP across samples (LoF mutations)", "Top C2:CGP Gene Sets")

print(mm909_c2cgp_lof_plot)

2.8 Eighth analysis using C2:CP:KEGG_MEDICUS

# Define paths
fora_results_all_csv_path <- file.path(results_gor_path, "fora_results_keggmed_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_gor_path, "fora_results_keggmed_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_gor_path, "fora_results_keggmed_lof.csv")

# # Load GMT file as a GeneSetCollection object
# gene_set_collection <- getGmt(file.path(data_path, "c2.cp.kegg_medicus.v2023.2.Hs.entrez.gmt"))
# 
# # Initialize an empty list to store the results
# gene_sets_list <- list()
# 
# # Iterate over each GeneSet in the GeneSetCollection
# for (setName in names(gene_set_collection)) {
#   # Extract the GeneSet by its name
#   geneSet <- gene_set_collection[[setName]]
# 
#   # Convert Entrez IDs to Ensembl IDs for this GeneSet
#   ensembl_ids <- ENTREZtoENSEMBL(geneIds(geneSet))
# 
#   # Ensure the result is a character vector of Ensembl Gene IDs
#   ensembl_ids_vector <- as.character(ensembl_ids)
# 
#   # Store the converted IDs in the list, associated with the setName
#   gene_sets_list[[setName]] <- ensembl_ids_vector
# }
# 
# # Set names for each element in the list to match the setName
# names(gene_sets_list) <- names(gene_set_collection)
# 
# # Perform FORA analysis for KEGG_MEDICUS Gene Sets
# results_list <- perform_fora_analysis(list_of_all_mutations, list_of_nonsyn_mutations, list_of_lof_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, gene_sets_list=gene_sets_list)

# Load fora results
fora_results_keggmed_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_keggmed_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_keggmed_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]

2.8.1 Visualization: Top C2:CP:KEGG_MEDICUS across samples stratified by response (All mutations)

# Define output path
mm909_keggmed_all_plot_path <- file.path(figures_path,
                              "mm909_keggmed_all_plot.png")

mm909_keggmed_all_plot <- plot_top5(response_df, fora_results_keggmed_all_df, mm909_keggmed_all_plot_path, "FORA with top C2:CP:KEGG_MEDICUS across samples (All mutations)", "Top C2:CP:KEGG_MEDICUS Gene Sets")

print(mm909_keggmed_all_plot)

2.8.2 Visualization: Top C2:CP:KEGG_MEDICUS across samples stratified by response (Nonsyn mutations)

# Define output path
mm909_keggmed_nonsyn_plot_path <- file.path(figures_path,
                              "mm909_keggmed_nonsyn_plot.png")

mm909_keggmed_nonsyn_plot <- plot_top5(response_df, fora_results_keggmed_nonsyn_df, mm909_keggmed_nonsyn_plot_path, "FORA with top C2:CP:KEGG_MEDICUS across samples (Nonsyn mutations)", "Top C2:CP:KEGG_MEDICUS Gene Sets")

print(mm909_keggmed_nonsyn_plot)

2.8.3 Visualization: Top C2:CP:KEGG_MEDICUS across samples stratified by response (LoF mutations)

# Define output path
mm909_keggmed_lof_plot_path <- file.path(figures_path,
                              "mm909_keggmed_lof_plot.png")

mm909_keggmed_lof_plot <- plot_top5(response_df, fora_results_keggmed_lof_df, mm909_keggmed_lof_plot_path, "FORA with top C2:CP:KEGG_MEDICUS across samples (LoF mutations)", "Top C2:CP:KEGG_MEDICUS Gene Sets")

print(mm909_keggmed_lof_plot)

2.9 Ninth analysis using C2:CP:REACTOME

# Define paths
fora_results_all_csv_path <- file.path(results_gor_path, "fora_results_reactome_all.csv")
fora_results_nonsyn_csv_path <- file.path(results_gor_path, "fora_results_reactome_nonsyn.csv")
fora_results_lof_csv_path <- file.path(results_gor_path, "fora_results_reactome_lof.csv")

# # Perform FORA analysis for C2_CGP Gene Sets
# results_list <- perform_fora_analysis(list_of_all_mutations, list_of_nonsyn_mutations, list_of_lof_mutations, fora_results_all_csv_path, fora_results_nonsyn_csv_path, fora_results_lof_csv_path, category="C2", subcategory="CP:REACTOME")

# Load fora results
fora_results_reactome_all_df <- read.csv(fora_results_all_csv_path, header = TRUE)[ , -1]
fora_results_reactome_nonsyn_df <- read.csv(fora_results_nonsyn_csv_path, header = TRUE)[ , -1]
fora_results_reactome_lof_df <- read.csv(fora_results_lof_csv_path, header = TRUE)[ , -1]

2.9.1 Visualization: Top C2:CP:REACTOME across samples stratified by response (All mutations)

# Define output path
mm909_reactome_all_plot_path <- file.path(figures_path,
                              "mm909_reactome_all_plot.png")

mm909_reactome_all_plot <- plot_top5(response_df, fora_results_reactome_all_df, mm909_reactome_all_plot_path, "FORA with top C2:CP:REACTOME across samples (All mutations)", "Top C2:CP:REACTOME Gene Sets")

print(mm909_reactome_all_plot)

2.9.2 Visualization: Top C2:CP:REACTOME across samples stratified by response (Nonsyn mutations)

# Define output path
mm909_reactome_nonsyn_plot_path <- file.path(figures_path,
                              "mm909_reactome_nonsyn_plot.png")

mm909_reactome_nonsyn_plot <- plot_top5(response_df, fora_results_reactome_nonsyn_df, mm909_reactome_nonsyn_plot_path, "FORA with top C2:CP:REACTOME across samples (Nonsyn mutations)", "Top C2:CP:REACTOME Gene Sets")

print(mm909_reactome_nonsyn_plot)

2.9.3 Visualization: Top C2:CP:REACTOME across samples stratified by response (LoF mutations)

# Define output path
mm909_reactome_lof_plot_path <- file.path(figures_path,
                              "mm909_reactome_lof_plot.png")

mm909_reactome_lof_plot <- plot_top5(response_df, fora_results_reactome_lof_df, mm909_reactome_lof_plot_path, "FORA with top C2:CP:REACTOME across samples (LoF mutations)", "Top C2:CP:REACTOME Gene Sets")

print(mm909_reactome_lof_plot)

3 Session Info

sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.7.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Copenhagen
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] org.Hs.eg.db_3.18.0  GSEABase_1.64.0      graph_1.80.0        
##  [4] annotate_1.80.0      XML_3.99-0.16.1      AnnotationDbi_1.64.1
##  [7] IRanges_2.36.0       S4Vectors_0.40.2     Biobase_2.62.0      
## [10] BiocGenerics_0.48.1  jsonlite_1.8.8       httr_1.4.7          
## [13] DT_0.31              knitr_1.45           lubridate_1.9.3     
## [16] forcats_1.0.0        stringr_1.5.1        dplyr_1.1.4         
## [19] purrr_1.0.2          readr_2.1.5          tidyr_1.3.1         
## [22] tibble_3.2.1         ggplot2_3.4.4        tidyverse_2.0.0     
## [25] readxl_1.4.3        
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.0        farver_2.1.1            blob_1.2.4             
##  [4] Biostrings_2.70.2       bitops_1.0-7            fastmap_1.1.1          
##  [7] RCurl_1.98-1.14         digest_0.6.34           timechange_0.3.0       
## [10] lifecycle_1.0.4         KEGGREST_1.42.0         RSQLite_2.3.5          
## [13] magrittr_2.0.3          compiler_4.3.2          rlang_1.1.3            
## [16] sass_0.4.8              tools_4.3.2             utf8_1.2.4             
## [19] yaml_2.3.8              htmlwidgets_1.6.4       bit_4.0.5              
## [22] withr_3.0.0             grid_4.3.2              fansi_1.0.6            
## [25] xtable_1.8-4            colorspace_2.1-0        scales_1.3.0           
## [28] cli_3.6.2               rmarkdown_2.25          crayon_1.5.2           
## [31] ragg_1.2.7              generics_0.1.3          rstudioapi_0.15.0      
## [34] tzdb_0.4.0              DBI_1.2.1               cachem_1.0.8           
## [37] zlibbioc_1.48.0         cellranger_1.1.0        XVector_0.42.0         
## [40] vctrs_0.6.5             hms_1.1.3               bit64_4.0.5            
## [43] systemfonts_1.0.5       jquerylib_0.1.4         glue_1.7.0             
## [46] stringi_1.8.3           gtable_0.3.4            GenomeInfoDb_1.38.6    
## [49] munsell_0.5.0           pillar_1.9.0            htmltools_0.5.7        
## [52] GenomeInfoDbData_1.2.11 R6_2.5.1                textshaping_0.3.7      
## [55] evaluate_0.23           highr_0.10              png_0.1-8              
## [58] memoise_2.0.1           bslib_0.6.1             xfun_0.42              
## [61] pkgconfig_2.0.3